All group randomization
pairwise_div_difference <- function(df){
df_div <- enframe(vegan::diversity(table(df$criteria, df$diagnosis)))
df_diff <- data.frame(t(combn(unique(sort(df$criteria)),2))) %>%
left_join(df_div, by = c("X1"="name")) %>%
left_join(df_div, by = c("X2"="name")) %>%
unite("pair", X1, X2, sep = ".") %>%
mutate(entropy_difference = value.x - value.y) %>%
select(-contains("value"))
deframe(df_diff)
}
# pairwise_div_difference(df)
enframe(pairwise_div_difference(df), name = "pair", value = "entropy_difference")
pairwise_div_iteration <- function(){
df %>%
count(criteria) %>%
mutate(diagnosis = map(n, function(x) sample(df$diagnosis, x, replace = T))) %>%
select(-n) %>%
unnest(diagnosis) %>%
pairwise_div_difference(.)
}
pairwise_div_iteration()
aha_kawasaki.eular_acr_sle aha_kawasaki.mcas_1
-2.358099e-03 -2.015759e-02
aha_kawasaki.mcas_2 aha_kawasaki.migraine
1.376623e-02 -2.365815e-03
aha_kawasaki.slicc_sle eular_acr_sle.mcas_1
-1.370551e-02 -1.779949e-02
eular_acr_sle.mcas_2 eular_acr_sle.migraine
1.612433e-02 -7.715884e-06
eular_acr_sle.slicc_sle mcas_1.mcas_2
-1.134741e-02 3.392382e-02
mcas_1.migraine mcas_1.slicc_sle
1.779177e-02 6.452076e-03
mcas_2.migraine mcas_2.slicc_sle
-1.613205e-02 -2.747174e-02
migraine.slicc_sle
-1.133970e-02
set.seed(1234)
pairs_permutation_distributions <- data.frame(t(replicate(10000, pairwise_div_iteration())))
sapply(combn(unique(sort(df$criteria)),2, simplify = F), paste, collapse = ".")
[1] "aha_kawasaki.eular_acr_sle" "aha_kawasaki.mcas_1" "aha_kawasaki.mcas_2" "aha_kawasaki.migraine" "aha_kawasaki.slicc_sle" "eular_acr_sle.mcas_1"
[7] "eular_acr_sle.mcas_2" "eular_acr_sle.migraine" "eular_acr_sle.slicc_sle" "mcas_1.mcas_2" "mcas_1.migraine" "mcas_1.slicc_sle"
[13] "mcas_2.migraine" "mcas_2.slicc_sle" "migraine.slicc_sle"
names(pairs_permutation_distributions)
[1] "aha_kawasaki.eular_acr_sle" "aha_kawasaki.mcas_1" "aha_kawasaki.mcas_2" "aha_kawasaki.migraine" "aha_kawasaki.slicc_sle" "eular_acr_sle.mcas_1"
[7] "eular_acr_sle.mcas_2" "eular_acr_sle.migraine" "eular_acr_sle.slicc_sle" "mcas_1.mcas_2" "mcas_1.migraine" "mcas_1.slicc_sle"
[13] "mcas_2.migraine" "mcas_2.slicc_sle" "migraine.slicc_sle"
pairs_entropy_diff <- enframe(pairwise_div_difference(df), name = "pair", value = "entropy_difference") %>%
mutate(entropy_difference = ifelse(entropy_difference >=0, 0-entropy_difference, entropy_difference))
pairs_entropy_diff
pairs_permutation_distributions %>%
pivot_longer(everything(), names_to = "pair", values_to = "entropy_difference") %>%
ggplot(aes(x = entropy_difference)) +
geom_histogram(binwidth = 0.01)+
geom_vline(data = pairs_entropy_diff, aes(xintercept = entropy_difference), color = "red")+
facet_wrap(~pair, scales = "free")+
theme_classic()

# Calculate p-values for diversity differences based on the quantile of the
# diversity difference value compared to the null hypothesis diversity distribution values
# Pairwise comparison adjusted
entropy_diff_table <- data.frame(pairs_permutation_distributions) %>%
pivot_longer(everything(), names_to = "pair", values_to = "entropy_difference") %>%
group_by(pair) %>%
summarise(entropy_difference_distribution = list(entropy_difference)) %>%
right_join(pairs_entropy_diff) %>%
mutate(p_value = map2_dbl(entropy_difference, entropy_difference_distribution, ~ecdf(.y)(.x))) %>%
mutate(p_adj = p.adjust(p_value, method = "bonferroni", n = n()))
Joining with `by = join_by(pair)`
entropy_diff_table
# Plot diversity values
broom::tidy(div_diag) %>%
mutate(names = tolower(names)) %>%
ggplot(aes(x=names, y = x))+
geom_bar(stat = "identity") +
ggpubr::stat_pvalue_manual(
entropy_diff_table %>% separate(pair, into = c("group1", "group2"), sep = "\\."),
label= "p_adj",
y.position = 7, step.increase = 0.2
) +
theme_bw() +
labs(x = "", y = "Shannon diversity") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Warning: 'tidy.numeric' is deprecated.
See help("Deprecated")

Pairwise randomization
pairs_list <- as.list(as.data.frame(combn(unique(df$criteria),2)))
names(pairs_list) <- sapply(pairs_list, paste, collapse = ".")
pairs_list
# Create function that subsets df of all diagnoses for a criteria based on a pair of two criteria to compare
# Then randomizes the diagnoses across the two criteria,
# measures the resulting diversity of each criteria's diagnoses
# and calculates a difference in the diversity value
# randomize can be set F to obtain original difference in diversity w/o randomization
# To be used with replicate to generate a null hypothesis distribution
permutation_pair_iteration <- function(pair, randomize = T){
df <- df %>%
filter(criteria %in% pair)
if (randomize == T){
df <- df %>% mutate(diagnosis = sample(diagnosis, n()))
}
div <- vegan::diversity(table(df$criteria, df$diagnosis))
unname(div[1] - div[2])
}
replicate(10,permutation_pair_iteration(pairs_list[[1]], randomize = T))
replicate(10,permutation_pair_iteration(pairs_list[[1]], randomize = F))
# # Create a null hypothesis distribution for the difference in diversity
# # For all pairs of criteria
# set.seed(1234)
# pairs_permutation_distributions <- lapply(pairs_list, function(x) replicate(10000, permutation_pair_iteration(x)))
# names(pairs_permutation_distributions) <- lapply(pairs_list, function(x) paste(x, collapse = "-"))
# data.frame(pairs_permutation_distributions)
# write_csv(data.frame(pairs_permutation_distributions), here("data/criteria_pairwise_null_difference_distributions.csv"))
pairs_permutation_distributions <- read_csv(here("data/criteria_pairwise_null_difference_distributions.csv"))
Rows: 10000 Columns: 15── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
dbl (15): mcas_1.mcas_2, mcas_1.aha_kawasaki, mcas_1.eular_acr_sle, mcas_1.migraine, mcas_1.slicc_sle, mcas_2.aha_kawasaki, mcas_2.eular_acr_sle, mcas_2.migraine, mcas_2.slicc_sle, aha_kawasaki.eular_...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
pairs_permutation_distributions
# Create a df of the actual difference in diversity between pairs of criteria
pairs_entropy_diff <- lapply(pairs_list, permutation_pair_iteration, randomize = F)
names(pairs_entropy_diff) <- lapply(pairs_list, function(x) paste(x, collapse = "-"))
pairs_entropy_diff <- data.frame(pairs_entropy_diff) %>%
pivot_longer(everything(), names_to = "pair", values_to = "entropy_difference") %>%
mutate(entropy_difference = ifelse(entropy_difference >=0, 0-entropy_difference, entropy_difference))
pairs_entropy_diff
# Create plots of null hypothesis distributions, overlaid with actual diversity difference
pairs_permutation_distributions %>%
pivot_longer(cols = everything(), names_to = "pair", values_to = "entropy_difference") %>%
ggplot(aes(x = entropy_difference)) +
geom_histogram(binwidth = 0.01)+
geom_vline(data = pairs_entropy_diff, aes(xintercept = entropy_difference), color = "red")+
facet_wrap(~pair, scales = "free")+
theme_classic()

# Calculate p-values for diversity differences based on the quantile of the
# diversity difference value compared to the null hypothesis diversity distribution values
# Pairwise comparison adjusted
data.frame(pairs_permutation_distributions) %>%
pivot_longer(everything(), names_to = "pair", values_to = "entropy_difference") %>%
group_by(pair) %>%
summarise(entropy_difference_distribution = list(entropy_difference)) %>%
right_join(pairs_entropy_diff) %>%
mutate(p_value = map2_dbl(entropy_difference, entropy_difference_distribution, ~ecdf(.y)(.x))) %>%
mutate(p_adj = p.adjust(p_value, method = "bonferroni", n = n()))
Joining with `by = join_by(pair)`